from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, make_scorer
from pathlib import Path
from sklearn.ensemble import BaggingClassifier

def write_hpValues(model_Name, HP_Values,dataset):
    Path("results/%s" % dataset).mkdir(parents=True, exist_ok=True)
    Path("results/%s/%s" % (dataset,model_Name)).mkdir(parents=True, exist_ok=True)
    file_name = r'results/%s/%s/BestParam.txt' % (dataset,model_Name)
    with open(file_name, 'a+') as x_file:
        x_file.write(str(HP_Values))
        x_file.write("\n----------------------------------------------------------")
    return 0


# parameters grid to select the best combination using the listed values
def DT_hyperparameterTuning(X_train, Y_train, dataset):
    grid_Param={'max_features': ['auto', 'sqrt', 'log2','None'],
                'min_samples_split': [2,3,4,5,6,7,8,9,10,11,12,13,14,15, 20, 30, 50],
                'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11, 30, 40, 50,],
                'criterion': ['gini', 'entropy'],
                'max_depth': [4,6,8,12, 18, 20]}

    kfold = KFold(n_splits=5, shuffle=True, random_state=42)

    DT = DecisionTreeClassifier()
    grid_search = RandomizedSearchCV(estimator= DT, param_distributions=grid_Param, n_iter=100, cv=kfold, verbose=2 , scoring='accuracy',error_score=0, random_state=42, n_jobs=-1)
    grid_search.fit(X_train, Y_train)
    best_grid = grid_search.best_estimator_
    write_hpValues('Bagging', str(best_grid),dataset)

    print(grid_search.best_estimator_)
    return grid_search


def Bagging_training_tuning(X_train,Y_train,dataset):
    best_random = DT_hyperparameterTuning(X_train, Y_train,dataset)
    dt = DecisionTreeClassifier(**best_random.best_params_)
    bag_model = BaggingClassifier(base_estimator=dt, n_estimators=100, bootstrap=True)
    bag_model = bag_model.fit(X_train,Y_train)

    return bag_model
